fert.data <- read_csv(here::here("fert-data.csv"))
## Parsed with column specification:
## cols(
## .default = col_character(),
## pH.experim = col_double(),
## Perc.Fertilization = col_double(),
## Insemination.mins = col_double(),
## Fert.success.mins = col_double(),
## Sperm.pre.exp.time = col_double(),
## egg.pre.exp.time = col_double(),
## pH.control = col_double(),
## pH.delta = col_double(),
## Sperm.per.uL = col_double(),
## Sperm.per.mL = col_number(),
## n.females = col_double(),
## n.males = col_double(),
## Yeear = col_double()
## )
## See spec(...) for full column specifications.
fert.data.2 <-
fert.data %>%
dplyr::select(pH.experim, Perc.Fertilization,
Insemination.mins, Fert.success.mins, Sperm.pre.exp.time,
egg.pre.exp.time, pH.delta, Sperm.per.mL, sperm.egg, n.females, n.males) %>%
mutate_if(is.factor, as.numeric) %>%
mutate_if(is.character, as.numeric)
## Warning: NAs introduced by coercion
fert.data.3 <-
fert.data %>%
mutate_at(c("Phylum", "Common name", "Brooders/Spawniers", "Family", "Taxa", "Species") , as.factor)
fert.data.3$pH.group <- cut(fert.data.3$pH.experim,c(5.9,7.3,7.5,7.8, 8.2))
fert.data.3$Phylum <- factor(fert.data.3$Phylum, levels = c("Echinodermata", "Mollusca","Cnidaria","Crustacean"))
fert.data.4 <- fert.data.3[,c(1,5,7:9,11,12,16:19)] %>%
mutate_if(is.factor, as.numeric) %>% #convert factors to numeric factors
mutate_if(is.character, as.numeric) #convert character column to numeric
## Warning: NAs introduced by coercion
pairs(na.omit(fert.data.4), lower.panel=panel.smooth, upper.panel=panel.cor)
# save to pdf
pdf(file = "fert-correlation-panel.pdf", width = 12, height = 8.5)
pairs(na.omit(fert.data.4), lower.panel=panel.smooth, upper.panel=panel.cor)
dev.off()
## quartz_off_screen
## 2
Gowers allows for missing data and multiple data types
dist.gower <- vegdist(fert.data.4, "gower")
Its usage is: cmdscale(d, k, eig = FALSE, add = FALSE) where: • d is a dissimilarity object (generated by dist or vegdist) • k is the number of principal components (PC) that should be extracted from the distance matrix (max number = min(col, rows)-1) • eig, logical. If TRUE eigenvalues for each PC are retuned. Default: FALSE. • add, logical. If TRUE a constant is added to each value in the dissimilarity matrix so that the resulting eigenvalues are non-negative. Default: FALSE.
The principal scores are contained in spe.pcoa$points and the eigenvalues are contained in spe.pcoa$eig.
spe.pcoa <- cmdscale(dist.gower, eig=T, add=T, k=2)
head(spe.pcoa$points, n=15)
## [,1] [,2]
## [1,] -0.6312144 -1.10963653
## [2,] -0.6659012 -0.91498453
## [3,] -0.8442947 -0.31278500
## [4,] -0.8393372 -0.28372659
## [5,] -0.6733455 -0.98486550
## [6,] -0.6858787 -0.86243567
## [7,] -0.6356584 -1.09620511
## [8,] -0.6406292 -0.99456059
## [9,] -1.0926354 -0.33980630
## [10,] -1.7569963 0.33231125
## [11,] -1.1538052 0.04844774
## [12,] -1.9954500 0.57209282
## [13,] -2.0591751 1.01716657
## [14,] -2.0589501 1.00410994
## [15,] -0.8746690 -0.95091085
head(spe.pcoa$eig, n=15)
## [1] 372.08580 213.66589 152.90546 98.83822 85.38434 78.17744 76.82164
## [8] 71.22881 67.85222 60.90775 59.10680 55.45987 51.75052 50.15431
## [15] 47.64539
hist(spe.pcoa$eig/sum(spe.pcoa$eig)*100)
plot(spe.pcoa$eig[1:100]/sum(spe.pcoa$eig)*100,type="b",lwd=2,col="blue",xlab= "Principal Component from PCoA", ylab="% variation explained", main="% variation explained by PCoA (blue) vs. random expectation (red)")
lines(bstick(100)*100,type="b",lwd=2,col="red")
This plot represents each of the sites in 2-D ordination space (x-axis = principal component 1, y-axis = principal component 2). (Should try to use something relevant for text)
Calculate and depict species loadings (i.e., principal weights in the eigenvectors) on each principal coordinate. Use the function envfit() along with the PC scores from our PCoA object. The function envfit() performs a linear correlation analysis based on standardized data (in other words, a simple linear regression) between each of the original descriptors (i.e., species) and the scores from each principal component. A permutation test is used to assess statistical significance, rather than using the F distribution.
print(vec.sp<-envfit(spe.pcoa$points, k=45, fert.data.4, perm=1000, na.rm=T))
##
## ***VECTORS
##
## Dim1 Dim2 r2 Pr(>r)
## Phylum -0.89708 -0.44188 0.8764 0.000999 ***
## Taxa 0.90930 0.41613 0.8948 0.000999 ***
## pH.experim 0.04069 -0.99917 0.2440 0.000999 ***
## Perc.Fertilization 0.21783 -0.97599 0.7805 0.000999 ***
## Insemination.mins -0.20953 0.97780 0.0975 0.002997 **
## Sperm.pre.exp.time -0.81658 0.57723 0.0596 0.044955 *
## egg.pre.exp.time -0.77307 0.63433 0.5632 0.000999 ***
## Sperm.per.mL -0.78064 0.62498 0.5712 0.000999 ***
## sperm.egg -0.99769 0.06788 0.6149 0.000999 ***
## n.females -0.65602 0.75474 0.5176 0.000999 ***
## n.males -0.67094 0.74151 0.2163 0.000999 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 1000
##
## 340 observations deleted due to missingness
p.max is the significance level that the species occurrence data must have with either PC in order to be depicted (these p-values were presented in vec.sp).
fert.data.4$pH.group <- cut(fert.data.4$pH.experim, c(6,7.3,7.5,7.8, 8.2))
pl <- ordiplot(spe.pcoa, type = "none", xlim = c(-1,1.5))
## species scores not available
points(pl, "sites", cex=0.8, pch=c(21,22,23,24)[fert.data.4$pH.group], bg=c("red","blue","green","purple")[fert.data.4$Phylum])
plot(vec.sp, p.max=.01, col="black") # note, p.max = set p-value threshold for plotting vectors.
legend(x="topright", legend = levels(fert.data.3$Phylum), col=c("red","blue","green", "purple"), pch=c(16,16,16,16))
legend(x="right", legend = levels(fert.data.3$pH.group),pch=c(21,22,23,24))
# save to pdf
pdf(file = "fert.PCoA.pdf", width = 9, height = 8)
pl <- ordiplot(spe.pcoa, type = "none", xlim = c(-1,1.5))
## species scores not available
points(pl, "sites", cex=0.8, pch=c(21,22,23,24)[fert.data.4$pH.group], bg=c("red","blue","green","purple")[fert.data.4$Phylum])
plot(vec.sp, p.max=.01, col="black") # note, p.max = set p-value threshold for plotting vectors.
legend(x="topright", legend = levels(fert.data.3$Phylum), col=c("red","blue","green", "purple"), pch=c(16,16,16,16))
legend(x="right", legend = levels(fert.data.3$pH.group),pch=c(21,22,23,24))
dev.off()
## quartz_off_screen
## 2
dist.gower2 <- vegdist(fert.data.4[c(3:11)], "gower", na.rm = F)
spe.pcoa2 <- cmdscale(dist.gower2, k=2, eig=T, add=T)
print(vec.sp2<-envfit(spe.pcoa2$points, k=45, fert.data.4[c(3:11)], perm=1000, na.rm=T))
##
## ***VECTORS
##
## Dim1 Dim2 r2 Pr(>r)
## pH.experim -0.73057 -0.68284 0.2953 0.000999 ***
## Perc.Fertilization -0.90933 0.41608 0.9917 0.000999 ***
## Insemination.mins 0.55402 0.83251 0.1267 0.003996 **
## Sperm.pre.exp.time 0.46661 0.88446 0.0444 0.065934 .
## egg.pre.exp.time 0.50435 0.86350 0.6414 0.000999 ***
## Sperm.per.mL 0.50324 0.86415 0.6414 0.000999 ***
## sperm.egg 0.55276 0.83334 0.2141 0.000999 ***
## n.females 0.54147 0.84072 0.6670 0.000999 ***
## n.males 0.54651 0.83745 0.2314 0.000999 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 1000
##
## 340 observations deleted due to missingness
pl2 <- ordiplot(spe.pcoa2, type = "none", xlim = c(-1,1.5))
## species scores not available
points(pl2, "sites", cex=1, pch=c(21,22,23,24)[fert.data.4$Phylum],
bg=c("red","blue","green","purple")[fert.data.4$pH.group])
plot(vec.sp2, p.max=.01, col="black") # note, p.max = set p-value threshold for plotting vectors.
legend(x="topright", legend = levels(fert.data.3$pH.group), col=c("red","blue","green", "purple"), pch=c(16,16,16,16))
legend(x="bottomright", legend = levels(fert.data.3$Phylum),pch=c(21,22,23,24))
# Save to pdf
pdf(file = "fert.PCoA-noPhylum.pdf", width = 9, height = 8)
pl2 <- ordiplot(spe.pcoa2, type = "none", xlim = c(-1,1.5))
## species scores not available
points(pl2, "sites", cex=1, pch=c(21,22,23,24)[fert.data.4$Phylum],
bg=c("red","blue","green","purple")[fert.data.4$pH.group])
plot(vec.sp2, p.max=.01, col="black") # note, p.max = set p-value threshold for plotting vectors.
legend(x="topright", legend = levels(fert.data.3$pH.group), col=c("red","blue","green", "purple"), pch=c(16,16,16,16))
legend(x="bottomright", legend = levels(fert.data.3$Phylum),pch=c(21,22,23,24))
dev.off()
## quartz_off_screen
## 2
hist(fert.data.4$Perc.Fertilization)
# plot % fert ~ pH.experim by Phylum
ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Phylum, col=Phylum, text=`Common name`)) +
geom_point(size=1.5, width=0.02) +
#facet_wrap(~Taxa) +
geom_smooth(method="lm", se=TRUE, aes(fill=Phylum)) +
ggtitle("Fertilization Rate ~ pH exposure by phylum"))
## Warning: Ignoring unknown parameters: width
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, col=Taxa, text=`Common name`)) +
geom_point(size=1.5, width=0.02) +
facet_wrap(~Phylum, scale="free") +
geom_smooth(method="lm", se=TRUE, aes(fill=Taxa)) +
ggtitle("Fertilization Rate ~ pH exposure by phylum") +
theme_minimal())
## Warning: Ignoring unknown parameters: width
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning in qt((1 - level)/2, df): NaNs produced
fert<-subset(fert.data.3, Phylum=="Mollusca")$Perc.Fertilization
ph<-subset(fert.data.3, Phylum=="Mollusca")$pH.experim
summary(model1 <- lm(fert~ph))
##
## Call:
## lm(formula = fert ~ ph)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49.139 -17.522 2.456 17.871 54.092
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -82.620 27.636 -2.990 0.00329 **
## ph 19.774 3.609 5.479 1.86e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.11 on 144 degrees of freedom
## Multiple R-squared: 0.1725, Adjusted R-squared: 0.1668
## F-statistic: 30.02 on 1 and 144 DF, p-value: 1.858e-07
plot(ph,fert,pch=21,col="brown",bg="yellow")
abline(model1,col="navy")
summary(model2 <- lm(fert~ph+I(ph^2)))
##
## Call:
## lm(formula = fert ~ ph + I(ph^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -49.919 -16.309 2.201 18.863 54.523
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -566.804 272.269 -2.082 0.0391 *
## ph 155.229 75.867 2.046 0.0426 *
## I(ph^2) -9.390 5.253 -1.787 0.0760 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.93 on 143 degrees of freedom
## Multiple R-squared: 0.1906, Adjusted R-squared: 0.1793
## F-statistic: 16.84 on 2 and 143 DF, p-value: 2.715e-07
x <- c(5.5, 5.6, 5.7, 5.8, 5.9, 6, 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 7, 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8, 8.1, 8.2, 8.3)
y <- predict(model2,list(ph=x))
plot(ph,fert,pch=21,col="brown",bg="yellow")
lines(x,y,col="navy")
anova(model1, model2) # simple model as good as polynomial model.
## Analysis of Variance Table
##
## Model 1: fert ~ ph
## Model 2: fert ~ ph + I(ph^2)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 144 76898
## 2 143 75217 1 1680.5 3.1949 0.07599 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hist(model1$residuals) #check residuals
plot(model1)
taxa <- as.factor(subset(fert.data.3, Phylum=="Mollusca")$Taxa)
summary(model2 <- lm(fert~ph+taxa)) #common slope, different intercepts by taxa
##
## Call:
## lm(formula = fert ~ ph + taxa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.74 -10.74 -1.31 11.32 37.16
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -77.857 21.707 -3.587 0.000462 ***
## ph 21.792 2.733 7.975 4.88e-13 ***
## taxaclam -39.129 4.849 -8.069 2.88e-13 ***
## taxamussel -0.952 4.873 -0.195 0.845387
## taxaoyster -30.745 4.343 -7.080 6.40e-11 ***
## taxascallop -12.163 6.462 -1.882 0.061884 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.82 on 140 degrees of freedom
## Multiple R-squared: 0.5739, Adjusted R-squared: 0.5587
## F-statistic: 37.71 on 5 and 140 DF, p-value: < 2.2e-16
anova(model1, model2) # definitely need to include sep. lines for taxa w/ diff intercepts
## Analysis of Variance Table
##
## Model 1: fert ~ ph
## Model 2: fert ~ ph + taxa
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 144 76898
## 2 140 39599 4 37299 32.967 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model3 <- lm(fert~ph*taxa)) # test for diff slopes and intercepts
##
## Call:
## lm(formula = fert ~ ph * taxa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.940 -10.699 -1.237 11.939 37.034
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -106.669 136.601 -0.781 0.436
## ph 25.472 17.441 1.461 0.146
## taxaclam -123.099 152.267 -0.808 0.420
## taxamussel 28.897 140.807 0.205 0.838
## taxaoyster 14.390 139.815 0.103 0.918
## taxascallop 133.018 188.389 0.706 0.481
## ph:taxaclam 11.097 19.537 0.568 0.571
## ph:taxamussel -3.820 18.038 -0.212 0.833
## ph:taxaoyster -5.803 17.864 -0.325 0.746
## ph:taxascallop -18.599 24.090 -0.772 0.441
##
## Residual standard error: 16.82 on 136 degrees of freedom
## Multiple R-squared: 0.586, Adjusted R-squared: 0.5586
## F-statistic: 21.39 on 9 and 136 DF, p-value: < 2.2e-16
anova(model2, model3) # don't need diff slopes
## Analysis of Variance Table
##
## Model 1: fert ~ ph + taxa
## Model 2: fert ~ ph * taxa
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 140 39599
## 2 136 38476 4 1123.1 0.9925 0.414
summary(model4 <- lm(fert~ph+I(ph^2) + taxa)) #test polynomial with taxa
##
## Call:
## lm(formula = fert ~ ph + I(ph^2) + taxa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.863 -10.514 0.447 8.640 37.702
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -597.5530 203.8928 -2.931 0.00395 **
## ph 167.1563 56.7824 2.944 0.00380 **
## I(ph^2) -10.0811 3.9335 -2.563 0.01144 *
## taxaclam -40.3816 4.7805 -8.447 3.57e-14 ***
## taxamussel -0.7367 4.7797 -0.154 0.87773
## taxaoyster -29.2334 4.2994 -6.799 2.85e-10 ***
## taxascallop -11.9372 6.3381 -1.883 0.06173 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.49 on 139 degrees of freedom
## Multiple R-squared: 0.5931, Adjusted R-squared: 0.5755
## F-statistic: 33.77 on 6 and 139 DF, p-value: < 2.2e-16
anova(model2, model4) # improves the model
## Analysis of Variance Table
##
## Model 1: fert ~ ph + taxa
## Model 2: fert ~ ph + I(ph^2) + taxa
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 140 39599
## 2 139 37812 1 1786.8 6.5683 0.01144 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model5 <- lm(fert~ph+I(ph^2)+I(ph^3)+ taxa)) #<----------final model
##
## Call:
## lm(formula = fert ~ ph + I(ph^2) + I(ph^3) + taxa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.526 -10.396 0.056 10.468 33.947
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9401.480 2385.708 -3.941 0.000129 ***
## ph 3887.105 1006.131 3.863 0.000171 ***
## I(ph^2) -530.433 140.584 -3.773 0.000239 ***
## I(ph^3) 24.116 6.513 3.703 0.000308 ***
## taxaclam -40.635 4.576 -8.879 3.19e-15 ***
## taxamussel -2.877 4.611 -0.624 0.533666
## taxaoyster -31.339 4.155 -7.543 5.56e-12 ***
## taxascallop -14.001 6.092 -2.298 0.023058 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.79 on 138 degrees of freedom
## Multiple R-squared: 0.6299, Adjusted R-squared: 0.6111
## F-statistic: 33.55 on 7 and 138 DF, p-value: < 2.2e-16
anova(model4, model5) # improves the model
## Analysis of Variance Table
##
## Model 1: fert ~ ph + I(ph^2) + taxa
## Model 2: fert ~ ph + I(ph^2) + I(ph^3) + taxa
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 139 37812
## 2 138 34395 1 3417.1 13.71 0.0003075 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(model1, model2, model3, model4, model5) #AIC confirms that model5 is best
## df AIC
## model1 3 1335.258
## model2 7 1246.362
## model3 11 1250.161
## model4 8 1241.621
## model5 9 1229.792
hist(model5$residuals) #residuals look pretty normal
plot(model5) #i see no major issues with residuals
lm.mollusc <- lm(fert~ph+I(ph^2)+I(ph^3)+ taxa) #<----------rename model
summary(lm.mollusc)
##
## Call:
## lm(formula = fert ~ ph + I(ph^2) + I(ph^3) + taxa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.526 -10.396 0.056 10.468 33.947
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9401.480 2385.708 -3.941 0.000129 ***
## ph 3887.105 1006.131 3.863 0.000171 ***
## I(ph^2) -530.433 140.584 -3.773 0.000239 ***
## I(ph^3) 24.116 6.513 3.703 0.000308 ***
## taxaclam -40.635 4.576 -8.879 3.19e-15 ***
## taxamussel -2.877 4.611 -0.624 0.533666
## taxaoyster -31.339 4.155 -7.543 5.56e-12 ***
## taxascallop -14.001 6.092 -2.298 0.023058 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.79 on 138 degrees of freedom
## Multiple R-squared: 0.6299, Adjusted R-squared: 0.6111
## F-statistic: 33.55 on 7 and 138 DF, p-value: < 2.2e-16
predict.mollusc <- predict(lm.mollusc, interval = 'confidence')
predict.mollusc.df <- predict.mollusc %>%
as.data.frame() %>%
cbind(subset(fert.data.3, Phylum=="Mollusca")$pH.experim, as.factor(subset(fert.data.3, Phylum=="Mollusca")$Taxa)) %>%
purrr::set_names(c("fit", "lwr", "upr", "ph", "taxa"))
predict.mollusc.df$taxa <- factor(predict.mollusc.df$taxa, levels=c("abalone", "mussel", "scallop", "oyster", "clam"))
fert.data.3 %>%
filter(Phylum=="Mollusca") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) +
geom_jitter(size=1.2, width=0.03) +
#facet_wrap(~Phylum, scales="free") + theme_minimal() +
#geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
geom_line(aes(pH.experim, predict.mollusc.df$fit, col=predict.mollusc.df$taxa)) +
ggtitle("Mollusca") +
xlab("Experimental pH") + ylab("Fertilization %") +
scale_color_manual(name="Taxa",
values=c(abalone="#e41a1c",mussel="#4daf4a",scallop="#ff7f00",oyster="#984ea3",clam='#377eb8')) +
theme_minimal()
fert<-subset(fert.data.3, Phylum=="Echinodermata")$Perc.Fertilization
ph<-subset(fert.data.3, Phylum=="Echinodermata")$pH.experim
taxa <- as.factor(subset(fert.data.3, Phylum=="Echinodermata")$Taxa)
summary(model1 <- lm(fert~ph))
##
## Call:
## lm(formula = fert ~ ph)
##
## Residuals:
## Min 1Q Median 3Q Max
## -75.318 -10.303 6.583 17.683 53.083
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -239.589 31.313 -7.651 5.29e-13 ***
## ph 40.501 4.073 9.943 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26 on 232 degrees of freedom
## Multiple R-squared: 0.2988, Adjusted R-squared: 0.2958
## F-statistic: 98.86 on 1 and 232 DF, p-value: < 2.2e-16
hist(model1$residuals)
plot(ph,fert,pch=21,col="brown",bg="yellow")
abline(model1,col="navy")
x <- c(5.5, 5.6, 5.7, 5.8, 5.9, 6, 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 7, 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8, 8.1, 8.2, 8.3)
summary(model2 <- lm(fert~ph+I(ph^2)))
##
## Call:
## lm(formula = fert ~ ph + I(ph^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -74.718 -7.789 9.085 17.002 51.480
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1101.486 314.930 -3.498 0.000563 ***
## ph 274.791 85.290 3.222 0.001457 **
## I(ph^2) -15.848 5.763 -2.750 0.006431 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 25.64 on 231 degrees of freedom
## Multiple R-squared: 0.321, Adjusted R-squared: 0.3152
## F-statistic: 54.61 on 2 and 231 DF, p-value: < 2.2e-16
y <- predict(model2,list(ph=x))
plot(ph,fert,pch=21,col="brown",bg="yellow")
lines(x,y,col="navy")
anova(model1, model2)
## Analysis of Variance Table
##
## Model 1: fert ~ ph
## Model 2: fert ~ ph + I(ph^2)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 232 156810
## 2 231 151839 1 4971.1 7.5627 0.006431 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model1 <- lm(fert~ph))
##
## Call:
## lm(formula = fert ~ ph)
##
## Residuals:
## Min 1Q Median 3Q Max
## -75.318 -10.303 6.583 17.683 53.083
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -239.589 31.313 -7.651 5.29e-13 ***
## ph 40.501 4.073 9.943 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26 on 232 degrees of freedom
## Multiple R-squared: 0.2988, Adjusted R-squared: 0.2958
## F-statistic: 98.86 on 1 and 232 DF, p-value: < 2.2e-16
summary(model2 <- lm(fert~ph+I(ph^2)))
##
## Call:
## lm(formula = fert ~ ph + I(ph^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -74.718 -7.789 9.085 17.002 51.480
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1101.486 314.930 -3.498 0.000563 ***
## ph 274.791 85.290 3.222 0.001457 **
## I(ph^2) -15.848 5.763 -2.750 0.006431 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 25.64 on 231 degrees of freedom
## Multiple R-squared: 0.321, Adjusted R-squared: 0.3152
## F-statistic: 54.61 on 2 and 231 DF, p-value: < 2.2e-16
anova(model1, model2) #2nd order polynomial better fit than straight line
## Analysis of Variance Table
##
## Model 1: fert ~ ph
## Model 2: fert ~ ph + I(ph^2)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 232 156810
## 2 231 151839 1 4971.1 7.5627 0.006431 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model3 <- lm(fert~ph+taxa)) # test different intercepts by taxa, common slope
##
## Call:
## lm(formula = fert ~ ph + taxa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -76.846 -9.951 4.468 16.030 62.004
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -266.804 30.053 -8.878 < 2e-16 ***
## ph 39.891 3.851 10.360 < 2e-16 ***
## taxaSea star 38.990 7.868 4.956 1.40e-06 ***
## taxaSea urchin 33.623 6.390 5.262 3.27e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.56 on 230 degrees of freedom
## Multiple R-squared: 0.3797, Adjusted R-squared: 0.3716
## F-statistic: 46.93 on 3 and 230 DF, p-value: < 2.2e-16
anova(model1, model3) #different intercept by taxa improves model
## Analysis of Variance Table
##
## Model 1: fert ~ ph
## Model 2: fert ~ ph + taxa
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 232 156810
## 2 230 138714 2 18096 15.003 7.511e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model4 <- lm(fert~ph*taxa))# test diff slopes AND intercepts by taxa
##
## Call:
## lm(formula = fert ~ ph * taxa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -76.629 -9.125 4.466 16.097 62.140
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -292.7922 112.1737 -2.610 0.00965 **
## ph 43.2924 14.6602 2.953 0.00348 **
## taxaSea star 36.8277 150.4571 0.245 0.80685
## taxaSea urchin 64.7199 116.7554 0.554 0.57990
## ph:taxaSea star 0.2466 19.5799 0.013 0.98996
## ph:taxaSea urchin -4.0673 15.2539 -0.267 0.78999
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.66 on 228 degrees of freedom
## Multiple R-squared: 0.3802, Adjusted R-squared: 0.3666
## F-statistic: 27.97 on 5 and 228 DF, p-value: < 2.2e-16
anova(model3, model4) # slopes not important
## Analysis of Variance Table
##
## Model 1: fert ~ ph + taxa
## Model 2: fert ~ ph * taxa
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 230 138714
## 2 228 138618 2 95.941 0.0789 0.9242
summary(model5 <- lm(fert~ph+taxa+I(ph^2))) #test 2nd order polynomial with taxa intercepts
##
## Call:
## lm(formula = fert ~ ph + taxa + I(ph^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -75.011 -7.635 7.262 15.482 58.831
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1151.728 296.737 -3.881 0.000136 ***
## ph 280.369 80.327 3.490 0.000579 ***
## taxaSea star 39.174 7.735 5.064 8.42e-07 ***
## taxaSea urchin 33.925 6.283 5.400 1.67e-07 ***
## I(ph^2) -16.266 5.427 -2.997 0.003026 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.14 on 229 degrees of freedom
## Multiple R-squared: 0.4031, Adjusted R-squared: 0.3927
## F-statistic: 38.67 on 4 and 229 DF, p-value: < 2.2e-16
anova(model3, model5) # adding 2nd order improves ph+taxa model.
## Analysis of Variance Table
##
## Model 1: fert ~ ph + taxa
## Model 2: fert ~ ph + taxa + I(ph^2)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 230 138714
## 2 229 133478 1 5235.6 8.9824 0.003026 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model2, model5) # adding taxa improves ph+2nd order model.
## Analysis of Variance Table
##
## Model 1: fert ~ ph + I(ph^2)
## Model 2: fert ~ ph + taxa + I(ph^2)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 231 151839
## 2 229 133478 2 18361 15.75 3.9e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model6 <- lm(fert~ph+I(ph^2)+I(ph^3)+ taxa)) # test adding 3rd order <----------final model
##
## Call:
## lm(formula = fert ~ ph + I(ph^2) + I(ph^3) + taxa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -75.790 -7.006 7.503 14.710 56.847
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7251.801 3050.313 2.377 0.01826 *
## ph -3222.939 1268.245 -2.541 0.01171 *
## I(ph^2) 468.019 175.057 2.674 0.00805 **
## I(ph^3) -22.205 8.023 -2.768 0.00611 **
## taxaSea star 38.558 7.628 5.055 8.85e-07 ***
## taxaSea urchin 32.236 6.224 5.180 4.89e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.8 on 228 degrees of freedom
## Multiple R-squared: 0.4225, Adjusted R-squared: 0.4099
## F-statistic: 33.37 on 5 and 228 DF, p-value: < 2.2e-16
anova(model5, model6) # adding 3rd order improves model
## Analysis of Variance Table
##
## Model 1: fert ~ ph + taxa + I(ph^2)
## Model 2: fert ~ ph + I(ph^2) + I(ph^3) + taxa
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 229 133478
## 2 228 129139 1 4338.8 7.6603 0.006109 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hist(model6$residuals) #residuals kind of normal ?
plot(model6) #doesn't look totally okay. should follow up.
lm.echino <- lm(fert~ph+I(ph^2)+I(ph^3)+ taxa)
summary(lm.echino)
##
## Call:
## lm(formula = fert ~ ph + I(ph^2) + I(ph^3) + taxa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -75.790 -7.006 7.503 14.710 56.847
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7251.801 3050.313 2.377 0.01826 *
## ph -3222.939 1268.245 -2.541 0.01171 *
## I(ph^2) 468.019 175.057 2.674 0.00805 **
## I(ph^3) -22.205 8.023 -2.768 0.00611 **
## taxaSea star 38.558 7.628 5.055 8.85e-07 ***
## taxaSea urchin 32.236 6.224 5.180 4.89e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.8 on 228 degrees of freedom
## Multiple R-squared: 0.4225, Adjusted R-squared: 0.4099
## F-statistic: 33.37 on 5 and 228 DF, p-value: < 2.2e-16
predict.echino <- predict(lm.echino, interval = 'confidence')
predict.echino.df <- predict.echino %>%
as.data.frame() %>%
cbind(subset(fert.data.3, Phylum=="Echinodermata")$pH.experim, as.factor(subset(fert.data.3, Phylum=="Echinodermata")$Taxa)) %>%
purrr::set_names(c("fit", "lwr", "upr", "ph", "taxa"))
predict.echino.df$taxa <- factor(predict.echino.df$taxa, levels=c("Sea star", "Sea urchin", "Sand dollar"))
fert.data.3 %>%
filter(Phylum=="Echinodermata") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) +
geom_jitter(size=1.2, width=0.03) +
#facet_wrap(~Phylum, scales="free") + theme_minimal() +
#geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
geom_line(aes(pH.experim, predict.echino.df$fit, col=predict.echino.df$taxa)) +
ggtitle("Echinodermata") +
xlab("Experimental pH") + ylab("Fertilization %") +
scale_color_manual(name="Taxa",
values=c(`Sand dollar`="#e41a1c",
`Sea star`="#4daf4a",
`Sea urchin`="#ff7f00")) + theme_minimal()
fert<-subset(fert.data.3, Phylum=="Cnidaria")$Perc.Fertilization
ph<-subset(fert.data.3, Phylum=="Cnidaria")$pH.experim
sperm <- subset(fert.data.3, Phylum=="Cnidaria")$Sperm.per.mL
ph.group <- as.factor(subset(fert.data.3, Phylum=="Cnidaria")$pH.group)
x <- c(7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8, 8.1, 8.2, 8.3)
summary(model0 <- lm(fert~ph)) #pH not sign. alone
##
## Call:
## lm(formula = fert ~ ph)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.637 -25.248 -4.231 33.430 49.855
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -331.51 230.16 -1.440 0.1551
## ph 48.93 29.14 1.679 0.0985 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.27 on 59 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.04559, Adjusted R-squared: 0.02942
## F-statistic: 2.819 on 1 and 59 DF, p-value: 0.09847
summary(model1 <- lm(fert~ph.group)) # ph.group not sign. alone
##
## Call:
## lm(formula = fert ~ ph.group)
##
## Residuals:
## Min 1Q Median 3Q Max
## -58.95 -25.12 -5.87 35.03 51.23
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 47.768 7.005 6.819 5.47e-09 ***
## ph.group(7.8,8.2] 11.352 8.875 1.279 0.206
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.59 on 59 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.02698, Adjusted R-squared: 0.01049
## F-statistic: 1.636 on 1 and 59 DF, p-value: 0.2059
summary(model2 <- lm(fert~sperm)) # sperm concentration sign. factor
##
## Call:
## lm(formula = fert ~ sperm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.190 -19.684 -6.424 34.686 46.936
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.199e+01 4.276e+00 12.158 <2e-16 ***
## sperm 7.840e-07 3.411e-07 2.299 0.025 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.41 on 60 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.08095, Adjusted R-squared: 0.06563
## F-statistic: 5.285 on 1 and 60 DF, p-value: 0.02501
summary(model3 <- lm(fert~sperm+ph)) # pH.group not sign. after controlling for sperm conc. intercept
##
## Call:
## lm(formula = fert ~ sperm + ph)
##
## Residuals:
## Min 1Q Median 3Q Max
## -62.388 -22.842 -2.913 32.158 51.155
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.406e+02 2.219e+02 -1.535 0.1302
## sperm 7.909e-07 3.376e-07 2.343 0.0226 *
## ph 4.974e+01 2.810e+01 1.770 0.0819 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.07 on 58 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.1281, Adjusted R-squared: 0.09806
## F-statistic: 4.262 on 2 and 58 DF, p-value: 0.01876
summary(model4 <- lm(fert~sperm+ph.group)) # pH.group not sign. after controlling for sperm conc. intercept
##
## Call:
## lm(formula = fert ~ sperm + ph.group)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56.882 -22.771 -4.626 31.833 53.144
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.436e+01 6.896e+00 6.432 2.62e-08 ***
## sperm 8.092e-07 3.409e-07 2.374 0.0209 *
## ph.group(7.8,8.2] 1.241e+01 8.557e+00 1.450 0.1525
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.35 on 58 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.1131, Adjusted R-squared: 0.08256
## F-statistic: 3.7 on 2 and 58 DF, p-value: 0.03074
summary(model5 <- lm(fert~sperm*ph)) # pH not sign. after controlling for sperm conc. intercept
##
## Call:
## lm(formula = fert ~ sperm * ph)
##
## Residuals:
## Min 1Q Median 3Q Max
## -62.867 -23.524 -3.503 31.476 51.425
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.635e+02 2.298e+02 -1.582 0.1192
## sperm 1.066e-05 2.307e-05 0.462 0.6460
## ph 5.264e+01 2.910e+01 1.809 0.0757 .
## sperm:ph -1.249e-06 2.922e-06 -0.428 0.6706
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.3 on 57 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.1309, Adjusted R-squared: 0.08517
## F-statistic: 2.862 on 3 and 57 DF, p-value: 0.04465
summary(model6 <- lm(fert~sperm*ph.group)) # ph.group not sign. after controlling for sperm
##
## Call:
## lm(formula = fert ~ sperm * ph.group)
##
## Residuals:
## Min 1Q Median 3Q Max
## -57.24 -23.18 -4.75 32.72 53.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.374e+01 7.099e+00 6.162 7.79e-08 ***
## sperm 9.548e-07 4.888e-07 1.953 0.0557 .
## ph.group(7.8,8.2] 1.343e+01 8.961e+00 1.499 0.1394
## sperm:ph.group(7.8,8.2] -2.874e-07 6.867e-07 -0.419 0.6771
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.58 on 57 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.1159, Adjusted R-squared: 0.06933
## F-statistic: 2.49 on 3 and 57 DF, p-value: 0.06941
summary(model3 <- lm(fert~sperm+ph))
##
## Call:
## lm(formula = fert ~ sperm + ph)
##
## Residuals:
## Min 1Q Median 3Q Max
## -62.388 -22.842 -2.913 32.158 51.155
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.406e+02 2.219e+02 -1.535 0.1302
## sperm 7.909e-07 3.376e-07 2.343 0.0226 *
## ph 4.974e+01 2.810e+01 1.770 0.0819 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.07 on 58 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.1281, Adjusted R-squared: 0.09806
## F-statistic: 4.262 on 2 and 58 DF, p-value: 0.01876
summary(model7 <- lm(fert~sperm+ph+I(ph^2)))
##
## Call:
## lm(formula = fert ~ sperm + ph + I(ph^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -62.169 -22.309 -2.716 32.691 51.073
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.245e+03 1.502e+04 -0.083 0.9342
## sperm 7.887e-07 3.425e-07 2.303 0.0249 *
## ph 2.787e+02 3.801e+03 0.073 0.9418
## I(ph^2) -1.448e+01 2.403e+02 -0.060 0.9522
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.35 on 57 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.1282, Adjusted R-squared: 0.0823
## F-statistic: 2.794 on 3 and 57 DF, p-value: 0.04842
summary(model8 <- lm(fert~sperm+ph+I(ph^2)+I(ph^3)))
##
## Call:
## lm(formula = fert ~ sperm + ph + I(ph^2) + I(ph^3))
##
## Residuals:
## Min 1Q Median 3Q Max
## -60.56 -24.36 -3.36 29.84 53.87
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.127e+06 9.981e+05 -1.129 0.2638
## sperm 7.849e-07 3.417e-07 2.297 0.0254 *
## ph 4.265e+05 3.779e+05 1.128 0.2639
## I(ph^2) -5.380e+04 4.769e+04 -1.128 0.2641
## I(ph^3) 2.262e+03 2.006e+03 1.128 0.2642
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.28 on 56 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.1475, Adjusted R-squared: 0.08665
## F-statistic: 2.423 on 4 and 56 DF, p-value: 0.05875
summary(model9 <- lm(fert~ph+sperm+I(sperm^2))) #<--- lowest AIC
##
## Call:
## lm(formula = fert ~ ph + sperm + I(sperm^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -59.882 -22.759 -2.132 25.616 53.629
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.741e+02 2.168e+02 -1.726 0.0898 .
## ph 5.337e+01 2.743e+01 1.946 0.0566 .
## sperm 5.355e-06 2.272e-06 2.357 0.0219 *
## I(sperm^2) -6.703e-14 3.302e-14 -2.030 0.0470 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31.24 on 57 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.1869, Adjusted R-squared: 0.1441
## F-statistic: 4.368 on 3 and 57 DF, p-value: 0.007748
summary(model10 <- lm(fert~ph+sperm+I(sperm^2)+I(sperm^3)))
##
## Call:
## lm(formula = fert ~ ph + sperm + I(sperm^2) + I(sperm^3))
##
## Residuals:
## Min 1Q Median 3Q Max
## -60.621 -21.172 -4.073 26.612 54.165
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.650e+02 2.167e+02 -1.685 0.0976 .
## ph 5.261e+01 2.740e+01 1.920 0.0599 .
## sperm -1.298e-06 6.605e-06 -0.197 0.8449
## I(sperm^2) 9.018e-13 9.039e-13 0.998 0.3227
## I(sperm^3) -1.272e-20 1.186e-20 -1.073 0.2881
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31.2 on 56 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.2033, Adjusted R-squared: 0.1464
## F-statistic: 3.572 on 4 and 56 DF, p-value: 0.01153
summary(model11 <- lm(fert~ph.group+sperm+I(sperm^2)))
##
## Call:
## lm(formula = fert ~ ph.group + sperm + I(sperm^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -53.929 -22.458 -6.036 30.681 55.359
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.948e+01 7.203e+00 5.481 9.99e-07 ***
## ph.group(7.8,8.2] 1.281e+01 8.368e+00 1.531 0.1314
## sperm 5.176e-06 2.296e-06 2.255 0.0280 *
## I(sperm^2) -6.413e-14 3.336e-14 -1.923 0.0595 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31.62 on 57 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.1671, Adjusted R-squared: 0.1233
## F-statistic: 3.813 on 3 and 57 DF, p-value: 0.01466
AIC(model0, model1, model2, model3, model4, model5, model6, model7, model8, model9, model10, model11)
## Warning in AIC.default(model0, model1, model2, model3, model4, model5,
## model6, : models are not all fitted to the same number of observations
## df AIC
## model0 3 604.6512
## model1 3 605.8295
## model2 3 611.2413
## model3 4 601.1340
## model4 4 602.1735
## model5 5 602.9387
## model6 5 603.9863
## model7 5 603.1301
## model8 6 603.7602
## model9 5 598.8751
## model10 6 599.6347
## model11 5 600.3409
anova(model3, model9) #model 9 improves model 3 by adding a 2nd order polynomial sperm variable
## Analysis of Variance Table
##
## Model 1: fert ~ sperm + ph
## Model 2: fert ~ ph + sperm + I(sperm^2)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 58 59665
## 2 57 55642 1 4023.6 4.1219 0.04701 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hist(model9$residuals) #kinda normal
plot(model9) #somewhat concerning ... check back later
lm.cnid <- lm(fert~ph+sperm+I(sperm^2), na.action = na.exclude)
summary(lm.cnid)
##
## Call:
## lm(formula = fert ~ ph + sperm + I(sperm^2), na.action = na.exclude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -59.882 -22.759 -2.132 25.616 53.629
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.741e+02 2.168e+02 -1.726 0.0898 .
## ph 5.337e+01 2.743e+01 1.946 0.0566 .
## sperm 5.355e-06 2.272e-06 2.357 0.0219 *
## I(sperm^2) -6.703e-14 3.302e-14 -2.030 0.0470 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31.24 on 57 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.1869, Adjusted R-squared: 0.1441
## F-statistic: 4.368 on 3 and 57 DF, p-value: 0.007748
predict.cnid <- predict(lm.cnid, interval = 'confidence')
predict.cnid.df <- cbind(as.data.frame(predict.cnid),
subset(fert.data.3, Phylum=="Cnidaria")$pH.experim,
subset(fert.data.3, Phylum=="Cnidaria")$Sperm.per.mL) %>%
purrr::set_names(c("fit", "lwr", "upr", "ph", "sperm"))
fert.data.3 %>%
filter(Phylum=="Cnidaria") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) +
geom_jitter(size=1.2, width=0.03) +
#facet_wrap(~Phylum, scales="free") + theme_minimal() +
#geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
geom_line(aes(pH.experim, predict.cnid.df$fit)) +
ggtitle("Cnidaria") +
xlab("Experimental pH") + ylab("Fertilization %") +
theme_minimal()
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_path).
fert.data.3 %>%
filter(Phylum=="Cnidaria") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) +
geom_jitter(size=1.2, width=0.03) +
#facet_wrap(~Phylum, scales="free") + theme_minimal() +
geom_smooth(method="lm", se=F, col="#4daf4a", size=0.6) +
#geom_line(aes(pH.experim, predict.cnid.df$fit)) +
ggtitle("Cnidaria") +
xlab("Experimental pH") + ylab("Fertilization %") +
theme_minimal()
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
fert<-subset(fert.data.3, Phylum=="Crustacean")$Perc.Fertilization
ph<-subset(fert.data.3, Phylum=="Crustacean")$pH.experim
taxa<-subset(fert.data.3, Phylum=="Crustacean")$Taxa
#x <- c(6.6, 6.7, 6.8, 6.9, 7, 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8, 8.1, 8.2, 8.3, 8.4)
summary(model0 <- lm(fert~ph)) #pH NOT sign. alone
##
## Call:
## lm(formula = fert ~ ph)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.36 -31.23 16.33 20.87 29.80
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -44.42 117.02 -0.380 0.709
## ph 13.10 15.28 0.857 0.404
##
## Residual standard error: 29.49 on 16 degrees of freedom
## Multiple R-squared: 0.04388, Adjusted R-squared: -0.01588
## F-statistic: 0.7343 on 1 and 16 DF, p-value: 0.4041
summary(model1 <- lm(fert~taxa)) #taxa sign. alone
##
## Call:
## lm(formula = fert ~ taxa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -62.380 -2.553 2.936 13.592 24.562
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.97 10.68 1.589 0.13296
## taxacopepod 49.74 12.33 4.034 0.00108 **
## taxacrab 49.93 18.50 2.699 0.01648 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.36 on 15 degrees of freedom
## Multiple R-squared: 0.5297, Adjusted R-squared: 0.467
## F-statistic: 8.446 on 2 and 15 DF, p-value: 0.003492
summary(model2 <- lm(fert~taxa+ph)) # pH.group not sign. after controlling for taxa intercept
##
## Call:
## lm(formula = fert ~ taxa + ph)
##
## Residuals:
## Min 1Q Median 3Q Max
## -50.008 -3.816 3.623 8.130 20.837
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -133.04 81.27 -1.637 0.123872
## taxacopepod 52.86 11.55 4.575 0.000432 ***
## taxacrab 49.93 17.15 2.912 0.011363 *
## ph 19.36 10.41 1.860 0.084055 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.8 on 14 degrees of freedom
## Multiple R-squared: 0.6228, Adjusted R-squared: 0.542
## F-statistic: 7.707 on 3 and 14 DF, p-value: 0.002791
anova(model1,model2) #pH does not improve the taxa only model
## Analysis of Variance Table
##
## Model 1: fert ~ taxa
## Model 2: fert ~ taxa + ph
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 15 6843.2
## 2 14 5487.5 1 1355.7 3.4588 0.08406 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model3 <- lm(fert~taxa*ph)) # pH.group not sign. after controlling for taxa intercept
##
## Call:
## lm(formula = fert ~ taxa * ph)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49.838 -0.120 2.592 7.777 20.989
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.00 325.50 0.034 0.974
## taxacopepod -93.21 337.36 -0.276 0.787
## taxacrab -328.50 563.79 -0.583 0.571
## ph 0.77 41.98 0.018 0.986
## taxacopepod:ph 18.85 43.57 0.433 0.673
## taxacrab:ph 48.83 72.71 0.672 0.515
##
## Residual standard error: 20.99 on 12 degrees of freedom
## Multiple R-squared: 0.6367, Adjusted R-squared: 0.4853
## F-statistic: 4.205 on 5 and 12 DF, p-value: 0.01931
anova(model1, model3) #pH interacion does not improve the taxa only model
## Analysis of Variance Table
##
## Model 1: fert ~ taxa
## Model 2: fert ~ taxa * ph
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 15 6843.2
## 2 12 5286.6 3 1556.7 1.1778 0.3591
summary(model4 <- lm(fert~taxa+ph+I(ph^2)))
##
## Call:
## lm(formula = fert ~ taxa + ph + I(ph^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.647 -2.847 2.097 7.278 26.587
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3781.13 1850.50 -2.043 0.061838 .
## taxacopepod 64.97 12.18 5.335 0.000135 ***
## taxacrab 49.93 15.61 3.199 0.006981 **
## ph 977.86 485.91 2.012 0.065366 .
## I(ph^2) -62.87 31.87 -1.973 0.070146 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.02 on 13 degrees of freedom
## Multiple R-squared: 0.7098, Adjusted R-squared: 0.6204
## F-statistic: 7.947 on 4 and 13 DF, p-value: 0.001808
anova(model1, model4) #interesting - sign.
## Analysis of Variance Table
##
## Model 1: fert ~ taxa
## Model 2: fert ~ taxa + ph + I(ph^2)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 15 6843.2
## 2 13 4223.0 2 2620.2 4.0331 0.04338 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model5 <- lm(fert~taxa+ph+I(ph^2)+I(ph^3))) # <--- final model
##
## Call:
## lm(formula = fert ~ taxa + ph + I(ph^2) + I(ph^3))
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.619 -6.728 1.979 5.331 35.003
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 105184.42 57490.39 1.830 0.092251 .
## taxacopepod 91.46 17.86 5.122 0.000252 ***
## taxacrab 49.93 14.25 3.504 0.004351 **
## ph -43017.45 23206.21 -1.854 0.088515 .
## I(ph^2) 5843.13 3114.80 1.876 0.085195 .
## I(ph^3) -263.63 139.03 -1.896 0.082265 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.46 on 12 degrees of freedom
## Multiple R-squared: 0.7767, Adjusted R-squared: 0.6836
## F-statistic: 8.346 on 5 and 12 DF, p-value: 0.001323
anova(model4, model5) #not a sign. improvement
## Analysis of Variance Table
##
## Model 1: fert ~ taxa + ph + I(ph^2)
## Model 2: fert ~ taxa + ph + I(ph^2) + I(ph^3)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 13 4223.0
## 2 12 3249.4 1 973.6 3.5955 0.08226 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(model0, model1, model2, model3, model4, model5)
## df AIC
## model0 3 176.7832
## model1 4 166.0134
## model2 5 164.0392
## model3 7 167.3677
## model4 6 161.3244
## model5 7 158.6070
hist(model5$residuals) #kinda normal
plot(model5) #not much data, but residuals look okay
# lm.crust <- lm(fert~taxa+ph+I(ph^2)+I(ph^3), na.action = na.exclude) #over fit
lm.crust <- lm(fert~taxa+ph+I(ph^2))
summary(lm.crust)
##
## Call:
## lm(formula = fert ~ taxa + ph + I(ph^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.647 -2.847 2.097 7.278 26.587
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3781.13 1850.50 -2.043 0.061838 .
## taxacopepod 64.97 12.18 5.335 0.000135 ***
## taxacrab 49.93 15.61 3.199 0.006981 **
## ph 977.86 485.91 2.012 0.065366 .
## I(ph^2) -62.87 31.87 -1.973 0.070146 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.02 on 13 degrees of freedom
## Multiple R-squared: 0.7098, Adjusted R-squared: 0.6204
## F-statistic: 7.947 on 4 and 13 DF, p-value: 0.001808
predict.crust <- predict(lm.crust, interval = 'confidence')
predict.crust.df <- cbind(as.data.frame(predict.crust),
subset(fert.data.3, Phylum=="Crustacean")$pH.experim,
subset(fert.data.3, Phylum=="Crustacean")$Taxa) %>%
purrr::set_names(c("fit", "lwr", "upr", "ph", "taxa"))
predict.crust.df$taxa <- factor(predict.crust.df$taxa, levels=c("copepod", "crab", "amphipod"))
fert.data.3 %>%
filter(Phylum=="Crustacean") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) +
geom_jitter(size=1.2, width=0.03) +
#facet_wrap(~Phylum, scales="free") + theme_minimal() +
#geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
geom_line(aes(pH.experim, predict.crust.df$fit, col=predict.crust.df$taxa)) +
ggtitle("Crustacea") +
xlab("Experimental pH") + ylab("Fertilization %") +
scale_color_manual(name="Taxa",
values=c(amphipod="#e41a1c",
copepod="#4daf4a",
crab="#ff7f00")) +
theme_minimal()
fert.data.3 %>%
filter(Phylum=="Crustacean") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization)) +
geom_jitter(size=1.2, width=0.03) +
#facet_wrap(~Phylum, scales="free") + theme_minimal() +
geom_smooth(method="lm", se=F, col="#4daf4a", size=0.6) +
#geom_line(aes(pH.experim, predict.crust.df$fit, col=predict.crust.df$taxa)) +
ggtitle("Crustacea") +
xlab("Experimental pH") + ylab("Fertilization %") +
theme_minimal()
fert.data.3 %>%
group_by(Taxa) %>%
summarise(count=n_distinct(Author))
## # A tibble: 12 x 2
## Taxa count
## <fct> <int>
## 1 abalone 2
## 2 amphipod 2
## 3 clam 5
## 4 copepod 4
## 5 coral 5
## 6 crab 2
## 7 mussel 4
## 8 oyster 4
## 9 Sand dollar 1
## 10 scallop 3
## 11 Sea star 2
## 12 Sea urchin 13
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
#library(grid)
plot.mollusca <- fert.data.3 %>%
filter(Phylum=="Mollusca") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) +
geom_jitter(size=1.2, width=0.03) +
#facet_wrap(~Phylum, scales="free") + theme_minimal() +
#geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
geom_line(aes(pH.experim, predict.mollusc.df$fit, col=predict.mollusc.df$taxa)) +
ggtitle("Mollusca") +
xlab("Experimental pH") + ylab("Fertilization %") +
scale_color_manual(name="Taxa (n = # studies)",
values=c(abalone="#e41a1c",mussel="#4daf4a",scallop="#ff7f00",oyster="#984ea3",clam='#377eb8'),
labels = c("abalone (n=2)", "mussel (n=4)", "scallop (n=3)", "oyster (n=4)", "clam (n=5)")) +
theme_minimal()
ggsave(filename = "fert.mollusca.pdf", width = 6, height = 4)
plot.echin <- fert.data.3 %>%
filter(Phylum=="Echinodermata") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) +
geom_jitter(size=1.2, width=0.03) +
#facet_wrap(~Phylum, scales="free") + theme_minimal() +
#geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
geom_line(aes(pH.experim, predict.echino.df$fit, col=predict.echino.df$taxa)) +
ggtitle("Echinodermata") +
xlab("Experimental pH") + ylab("Fertilization %") +
scale_color_manual(name="Taxa (n = # studies)",
values=c(`Sand dollar`="#e41a1c",
`Sea star`="#4daf4a",
`Sea urchin`="#ff7f00"),
labels = c("Sand dollar (n=1)", "Sea star (n=2)", "Sea urchin (n=13)")) +
theme_minimal()
ggsave(filename = "fert.echinodermata.pdf", width = 6, height = 4)
plot.cnid <- fert.data.3 %>%
filter(Phylum=="Cnidaria") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) +
geom_jitter(size=1.2, width=0.03) +
#facet_wrap(~Phylum, scales="free") + theme_minimal() +
geom_smooth(method="lm", se=F, col="#4daf4a", size=0.6) +
#geom_line(aes(pH.experim, predict.cnid.df$fit)) +
ggtitle("Cnidaria (n=5 studies)") +
xlab("Experimental pH") + ylab("Fertilization %") +
theme_minimal()
ggsave(filename = "fert.cnidaria.pdf", width = 5, height = 4)
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
plot.cnid.overfit <- fert.data.3 %>%
filter(Phylum=="Cnidaria") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) +
geom_jitter(size=1.2, width=0.03) +
#facet_wrap(~Phylum, scales="free") + theme_minimal() +
#geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
geom_line(aes(pH.experim, predict.cnid.df$fit)) +
ggtitle("Cnidaria - overfit (n=5 studies)") +
xlab("Experimental pH") + ylab("Fertilization %") +
theme_minimal()
ggsave(filename = "fert.cnidaria.overfit.pdf", width = 5, height = 4)
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_path).
plot.crust <- fert.data.3 %>%
filter(Phylum=="Crustacean") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization)) +
geom_jitter(size=1.2, width=0.03) +
#facet_wrap(~Phylum, scales="free") + theme_minimal() +
geom_smooth(method="lm", se=F, col="#4daf4a", size=0.6) +
#geom_line(aes(pH.experim, predict.crust.df$fit, col=predict.crust.df$taxa)) +
ggtitle("Crustacea (n=8 studies)") +
xlab("Experimental pH") + ylab("Fertilization %") +
theme_minimal()
ggsave(filename = "fert.crustacea.pdf", width = 5, height = 4)
plot.crust.overfit <- fert.data.3 %>%
filter(Phylum=="Crustacean") %>%
ggplot(mapping=aes(x=pH.experim, y=Perc.Fertilization, group=Taxa, text=`Common name`)) +
geom_jitter(size=1.2, width=0.03) +
#facet_wrap(~Phylum, scales="free") + theme_minimal() +
#geom_smooth(method="lm", se=F, aes(col=Taxa), formula=y ~ poly(x, 2, raw=TRUE)) +
geom_line(aes(pH.experim, predict.crust.df$fit, col=predict.crust.df$taxa)) +
ggtitle("Crustacea - overfit?") +
xlab("Experimental pH") + ylab("Fertilization %") +
scale_color_manual(name="Taxa (n = # studies)",
values=c(amphipod="#e41a1c",
copepod="#4daf4a",
crab="#ff7f00"),
labels = c("amphipod (n=2)", "copepod (n=4)", "crab (n=2)")) +
theme_minimal()
ggsave(filename = "fert.crustacea.overfit.pdf", width = 6, height = 4)
pdf("fert.all.pdf", height = 11, width = 15)
grid.arrange(plot.mollusca, plot.echin, plot.cnid, plot.crust, plot.cnid.overfit, plot.crust.overfit, ncol=2)
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_path).
dev.off()
## quartz_off_screen
## 2
Insemination.mins 0.42319 -0.90604 0.3439 0.000999 ***
# plot % fert ~ pH.experim by Phylum
ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=Insemination.mins, y=Perc.Fertilization, group=Phylum, col=Phylum, text=`Common name`)) +
geom_point(size=1.5, width=0.02) +
#facet_wrap(~Taxa) +
geom_smooth(method="lm", se=TRUE, aes(fill=Phylum)) +
ggtitle("Fertilization Rate ~ Insemination minutes by phylum"))
## Warning: Ignoring unknown parameters: width
## Warning: Removed 65 rows containing non-finite values (stat_smooth).
summary(aov(Perc.Fertilization ~ factor(Phylum)*Insemination.mins, fert.data.3)) # not sign. alone, but sign for some phyla
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(Phylum) 2 22015 11007 14.131 1.18e-06 ***
## Insemination.mins 1 6 6 0.008 0.9288
## factor(Phylum):Insemination.mins 2 6310 3155 4.050 0.0182 *
## Residuals 394 306910 779
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 65 observations deleted due to missingness
egg.pre.exp.time 0.82583 0.56392 0.2503 0.001998 **
# plot % fert ~ pH.experim by Phylum
ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=log(egg.pre.exp.time), y=Perc.Fertilization, group=Phylum, col=Phylum, text=`Common name`)) +
geom_point(size=1.5, width=0.02) +
#facet_wrap(~Taxa) +
geom_smooth(method="lm", se=TRUE, aes(fill=Phylum)) +
ggtitle("Fertilization Rate ~ Egg pre-exposure minutes by phylum"))
## Warning: Ignoring unknown parameters: width
## Warning: Removed 266 rows containing non-finite values (stat_smooth).
fert.data.3$egg.pre.exp.time.log <- log(fert.data.3$egg.pre.exp.time+1)
summary(fert.data.3$egg.pre.exp.time.log)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 0.000 2.773 1.991 2.773 7.273 156
summary(aov(Perc.Fertilization ~ factor(Phylum)*egg.pre.exp.time.log, fert.data.3)) # not sign. alone, but sign for some phyla
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(Phylum) 2 2712 1356 1.911 0.149717
## egg.pre.exp.time.log 1 528 528 0.744 0.389184
## factor(Phylum):egg.pre.exp.time.log 1 8413 8413 11.855 0.000656
## Residuals 304 215740 710
##
## factor(Phylum)
## egg.pre.exp.time.log
## factor(Phylum):egg.pre.exp.time.log ***
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 156 observations deleted due to missingness
Sperm.per.mL 0.81881 0.57406 0.2492 0.001998 **
ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=log(Sperm.per.mL+1), y=Perc.Fertilization, group=Phylum, col=Phylum, text=`Common name`)) +
geom_point(size=1.5, width=0.02) +
#facet_wrap(~Taxa) +
geom_smooth(method="lm", se=TRUE, aes(fill=Phylum)) +
ggtitle("Fertilization Rate ~ sperm concentration (log-trans) by phylum"))
## Warning: Ignoring unknown parameters: width
## Warning: Removed 204 rows containing non-finite values (stat_smooth).
fert.data.3$Sperm.per.mL.log <- log(fert.data.3$Sperm.per.mL+1)
summary(aov(Perc.Fertilization ~ factor(Phylum)*Sperm.per.mL.log, fert.data.3)) # not sign. alone, but sign for some phyla
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(Phylum) 2 29032 14516 27.612 1.40e-11 ***
## Sperm.per.mL.log 1 537 537 1.022 0.313
## factor(Phylum):Sperm.per.mL.log 2 23492 11746 22.343 1.15e-09 ***
## Residuals 255 134059 526
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 204 observations deleted due to missingness
pH groups: – 6-7.3 – 7.3-7.5 – 7.5-7.8 – 7.8-8.2
ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=log(Sperm.per.mL+1), y=Perc.Fertilization, group=Phylum:pH.group, col=Phylum:pH.group, text=`Common name`, shape=pH.group)) +
geom_point(size=1.5, width=0.02) +
#facet_wrap(~Taxa) +
geom_smooth(method="lm", se=TRUE, aes(fill=Phylum:pH.group)) +
ggtitle("Fertilization Rate ~ sperm concentration (log-trans) by phylum"))
## Warning: Ignoring unknown parameters: width
## Warning: Removed 204 rows containing non-finite values (stat_smooth).
## Warning: Factor `Phylum:pH.group` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Factor `Phylum:pH.group` contains implicit NA, consider using
## `forcats::fct_explicit_na`
sperm.egg 0.20294 0.97919 0.0953 0.038961 *
# plot % fert ~ pH.experim by Phylum
ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=sperm.egg, y=Perc.Fertilization, group=Phylum, col=Phylum, text=`Common name`)) +
geom_point(size=1.5, width=0.02) +
#facet_wrap(~Taxa) +
geom_smooth(method="lm", se=TRUE, aes(fill=Phylum)) +
ggtitle("Fertilization Rate ~ sperm:egg ratio by phylum"))
## Warning: Ignoring unknown parameters: width
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
summary(aov(Perc.Fertilization ~ factor(Phylum)*sperm.egg, fert.data.3)) # sign. main and interaction effects
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(Phylum) 2 16503 8252 29.06 1.16e-11 ***
## sperm.egg 35 135929 3884 13.68 < 2e-16 ***
## factor(Phylum):sperm.egg 1 9519 9519 33.52 3.09e-08 ***
## Residuals 180 51119 284
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 246 observations deleted due to missingness
n.females 0.90918 0.41640 0.2463 0.001998 **
# plot % fert ~ pH.experim by Phylum
ggplotly(fert.data.3 %>%
ggplot(mapping=aes(x=n.females, y=Perc.Fertilization, group=Phylum, col=Phylum, text=`Common name`)) +
geom_point(size=1.5, width=0.02) +
#facet_wrap(~Taxa) +
geom_smooth(method="lm", se=TRUE, aes(fill=Phylum)) +
ggtitle("Fertilization Rate ~ No. females used for assay, by phylum"))
## Warning: Ignoring unknown parameters: width
## Warning: Removed 97 rows containing non-finite values (stat_smooth).
summary(aov(Perc.Fertilization ~ factor(Phylum)*n.females, fert.data.3)) # sign. main effect, not interaction
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(Phylum) 2 14574 7287 9.605 8.62e-05 ***
## n.females 1 12866 12866 16.957 4.74e-05 ***
## factor(Phylum):n.females 2 2094 1047 1.380 0.253
## Residuals 362 274654 759
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 97 observations deleted due to missingness
summary(aov(Perc.Fertilization ~ factor(Phylum)*(pH.experim + Insemination.mins + egg.pre.exp.time.log + Sperm.per.mL.log + n.females), fert.data.3)) #
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(Phylum) 2 3959 1979 7.094 0.001069
## pH.experim 1 3416 3416 12.242 0.000582
## Insemination.mins 1 153 153 0.548 0.459852
## egg.pre.exp.time.log 1 1474 1474 5.284 0.022615
## Sperm.per.mL.log 1 4207 4207 15.079 0.000142
## n.females 1 7482 7482 26.815 5.68e-07
## factor(Phylum):pH.experim 2 995 497 1.782 0.171047
## factor(Phylum):Insemination.mins 1 10689 10689 38.308 3.64e-09
## factor(Phylum):egg.pre.exp.time.log 1 2883 2883 10.333 0.001536
## factor(Phylum):Sperm.per.mL.log 1 3397 3397 12.173 0.000603
## factor(Phylum):n.females 1 794 794 2.847 0.093164
## Residuals 190 53015 279
##
## factor(Phylum) **
## pH.experim ***
## Insemination.mins
## egg.pre.exp.time.log *
## Sperm.per.mL.log ***
## n.females ***
## factor(Phylum):pH.experim
## factor(Phylum):Insemination.mins ***
## factor(Phylum):egg.pre.exp.time.log **
## factor(Phylum):Sperm.per.mL.log ***
## factor(Phylum):n.females .
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 261 observations deleted due to missingness